Map
thor<-read.csv("Thor3_tweets.csv",header = TRUE, sep = ",")
thor<-data.frame(thor)
cap<-read.csv("Cap3_tweets.csv",header = TRUE, sep = ",")
cap<-data.frame(cap)
aven<-read.csv("Avengers3_tweets.csv",header = TRUE, sep = ",")
aven<-data.frame(aven)
Thor
thor_map <- leaflet(thor) %>%
addTiles('http://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png',
attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>,
<a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> —
Map data © <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>')
thor_map %>% addCircles(~place_lon, ~place_lat,
popup=thor$place_lon, weight =5, radius=40,
color="red", stroke = TRUE, fillOpacity = 0.5)
CaptainAmerica
cap_map <- leaflet(cap) %>%
addTiles('http://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png',
attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>,
<a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> —
Map data © <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>')
cap_map %>% addCircles(~place_lon, ~place_lat,
popup=cap$place_lon, weight =5, radius=50,
color="royalblue", stroke = TRUE, fillOpacity = 0.8)
Avengers
aven_map <- leaflet(aven) %>%
addTiles('http://{s}.basemaps.cartocdn.com/rastertiles/voyager/{z}/{x}/{y}.png',
attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>,
<a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> —
Map data © <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>')
aven_map %>% addCircles(~place_lon, ~place_lat,
popup=thor$lon, weight =5, radius=50,
color="thistle", stroke = TRUE, fillOpacity = 0.8)
Emoji
tweets1<-read.csv("tweets1.csv",header = TRUE, sep = ",")
tweets1<-data.frame(tweets1)
tweets2<-read.csv("tweets2.csv",header = TRUE, sep = ",")
tweets2<-data.frame(tweets2)
tweets3<-read.csv("tweets3.csv",header = TRUE, sep = ",")
tweets3<-data.frame(tweets3)
Emoji dictionary
emdict.la <- read.csv('emoticon_conversion_noGraphic.csv', header = F)#Lauren Ancona; https://github.com/laurenancona/twimoji/tree/master/twitterEmojiProject
emdict.la <- emdict.la[-1, ]; row.names(emdict.la) <- NULL; names(emdict.la) <- c('unicode', 'bytes', 'name'); emdict.la$emojiid <- row.names(emdict.la);
emdict.jpb <- read.csv('emDict.csv', header = F) #Jessica Peterka-Bonetta; http://opiateforthemass.es/articles/emoticons-in-R/
emdict.jpb <- emdict.jpb[-1, ]; row.names(emdict.jpb) <- NULL; names(emdict.jpb) <- c('name', 'bytes', 'rencoding'); emdict.jpb$name <- tolower(emdict.jpb$name);
emdict.jpb$bytes <- NULL;
emojis <- merge(emdict.la, emdict.jpb, by = 'name'); emojis$emojiid <- as.numeric(emojis$emojiid); emojis <- arrange(emojis, emojiid);
Thor
#create full tweets by emojis matrix
df.s <- matrix(NA, nrow = nrow(tweets1), ncol = ncol(emojis));
system.time(df.s <- sapply(emojis$rencoding, regexpr, tweets1$text, ignore.case = T, useBytes = T));
## user system elapsed
## 1.648 0.022 1.698
rownames(df.s) <- 1:nrow(df.s); colnames(df.s) <- 1:ncol(df.s);df.t <- data.frame(df.s); df.t$tweetid <- tweets1$tweetid;
# merge in hashtag data from original tweets dataset
df.a <- subset(tweets1, select = c(tweetid, hashtag));
df.u <- merge(df.t, df.a, by = 'tweetid'); df.u$z <- 1;df.u <- arrange(df.u, tweetid);
tweets.emojis.matrix <- df.u;
## create emoji count dataset
df <- subset(tweets.emojis.matrix)[, c(2:843)]; count <- colSums(df > -1);
emojis.m <- cbind(count, emojis); emojis.m <- arrange(emojis.m, desc(count));
emojis.count <- subset(emojis.m, count > 1); emojis.count$dens <- round(1000 * (emojis.count$count / nrow(tweets1)), 1); emojis.count$dens.sm <- (emojis.count$count + 1) / (nrow(tweets1) + 1);
emojis.count$rank <- as.numeric(row.names(emojis.count));
emojis.count.p <- subset(emojis.count, select = c(name, dens, count, rank));
# print summary stats
subset(emojis.count.p, rank <= 10);
## name dens count rank
## 1 heavy black heart 26.0 14 1
## 2 face with tears of joy 16.7 9 2
## 3 dash symbol 14.8 8 3
## 4 smiling face with sunglasses 14.8 8 4
## 5 loudly crying face 13.0 7 5
## 6 fire 11.1 6 6
## 7 sparkles 9.3 5 7
## 8 musical score 9.3 5 8
## 9 purple heart 9.3 5 9
## 10 smiling face with heart-shaped eyes 7.4 4 10
num.tweets <- nrow(tweets1); df.t <- rowSums(tweets.emojis.matrix[, c(2:843)] > -1); num.tweets.with.emojis <- length(df.t[df.t > 0]); num.emojis <- sum(emojis.count$count);
#min(tweets1$created); max(tweets1$created); median(tweets1$created);
num.tweets; num.tweets.with.emojis; round(100 * (num.tweets.with.emojis / num.tweets), 1); num.emojis; nrow(emojis.count);
## [1] 539
## [1] 84
## [1] 15.6
## [1] 133
## [1] 34
#MAKE BAR CHART OF TOP EMOJIS
df.plot <- subset(emojis.count.p, rank <= 10); xlab <- 'Rank'; ylab <- 'Overall Frequency (per 1,000 Tweets)';
setwd("~/Documents/R/twitter project/ios_9_3_emoji_files")
df.plot <- arrange(df.plot, name);
imgs <- lapply(paste0(df.plot$name, '.png'), png::readPNG); g <- lapply(imgs, grid::rasterGrob);
k <- 0.20 * (10/nrow(df.plot)) * max(df.plot$dens); df.plot$xsize <- k; df.plot$ysize <- k;
df.plot <- arrange(df.plot, name);
write.csv(df.plot, "emoji1.csv", row.names = FALSE)
g1 <- ggplot(data = df.plot, aes(x = rank, y = dens)) +
geom_bar(stat = 'identity', fill = 'dodgerblue4') +
xlab(xlab) + ylab(ylab) +
mapply(function(x, y, i) {
annotation_custom(g[[i]], xmin = x-0.5*df.plot$xsize[i], xmax = x+0.5*df.plot$xsize[i],
ymin = y-0.5*df.plot$ysize[i], ymax = y+0.5*df.plot$ysize[i])},
df.plot$rank, df.plot$dens, seq_len(nrow(df.plot))) +
scale_x_continuous(expand = c(0, 0), breaks = seq(1, nrow(df.plot), 1), labels = seq(1, nrow(df.plot), 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.10 * max(df.plot$dens))) +
labs(title="Emoji for ThorRagnarok")+
theme(panel.grid.minor.y = element_blank(),
axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 8, colour = 'black'), axis.text.y = element_text(size = 8, colour = 'black'));
g1

Captain America
#create full tweets by emojis matrix
df.s <- matrix(NA, nrow = nrow(tweets2), ncol = ncol(emojis));
system.time(df.s <- sapply(emojis$rencoding, regexpr, tweets2$text, ignore.case = T, useBytes = T));
## user system elapsed
## 0.296 0.003 0.304
rownames(df.s) <- 1:nrow(df.s); colnames(df.s) <- 1:ncol(df.s);df.t <- data.frame(df.s); df.t$tweetid <- tweets2$tweetid;
# merge in hashtag data from original tweets dataset
df.a <- subset(tweets2, select = c(tweetid, hashtag));
df.u <- merge(df.t, df.a, by = 'tweetid'); df.u$z <- 1;df.u <- arrange(df.u, tweetid);
tweets.emojis.matrix <- df.u;
## create emoji count dataset
df <- subset(tweets.emojis.matrix)[, c(2:843)]; count <- colSums(df > -1);
emojis.m <- cbind(count, emojis); emojis.m <- arrange(emojis.m, desc(count));
emojis.count <- subset(emojis.m, count > 1); emojis.count$dens <- round(1000 * (emojis.count$count / nrow(tweets2)), 1); emojis.count$dens.sm <- (emojis.count$count + 1) / (nrow(tweets2) + 1);
emojis.count$rank <- as.numeric(row.names(emojis.count));
emojis.count.p <- subset(emojis.count, select = c(name, dens, count, rank));
# print summary stats
subset(emojis.count.p, rank <= 10);
## name dens count rank
## 1 loudly crying face 24.4 2 1
num.tweets <- nrow(tweets2); df.t <- rowSums(tweets.emojis.matrix[, c(2:843)] > -1); num.tweets.with.emojis <- length(df.t[df.t > 0]); num.emojis <- sum(emojis.count$count);
#min(tweets2$created); max(tweets2$created); median(tweets2$created);
num.tweets; num.tweets.with.emojis; round(100 * (num.tweets.with.emojis / num.tweets), 1); num.emojis; nrow(emojis.count);
## [1] 82
## [1] 7
## [1] 8.5
## [1] 2
## [1] 1
#MAKE BAR CHART OF TOP EMOJIS
df.plot <- subset(emojis.count.p, rank <= 10); xlab <- 'Rank'; ylab <- 'Overall Frequency (per 1,000 Tweets)';
setwd("~/Documents/R/twitter project/ios_9_3_emoji_files")
df.plot <- arrange(df.plot, name);
imgs <- lapply(paste0(df.plot$name, '.png'), png::readPNG); g <- lapply(imgs, grid::rasterGrob);
k <- 0.20 * (10/nrow(df.plot)) * max(df.plot$dens); df.plot$xsize <- k; df.plot$ysize <- k;
df.plot <- arrange(df.plot, name);
write.csv(df.plot, "emoji2.csv", row.names = FALSE)
g2 <- ggplot(data = df.plot, aes(x = rank, y = dens)) +
geom_bar(stat = 'identity', fill = 'dodgerblue4') +
xlab(xlab) + ylab(ylab) +
mapply(function(x, y, i) {
annotation_custom(g[[i]], xmin = x-0.5*df.plot$xsize[i], xmax = x+0.5*df.plot$xsize[i],
ymin = y-0.5*df.plot$ysize[i], ymax = y+0.5*df.plot$ysize[i])},
df.plot$rank, df.plot$dens, seq_len(nrow(df.plot))) +
scale_x_continuous(expand = c(0, 0), breaks = seq(1, nrow(df.plot), 1), labels = seq(1, nrow(df.plot), 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.10 * max(df.plot$dens))) +
labs(title="Emoji for CaptainAmericaCivilWar")+
theme(panel.grid.minor.y = element_blank(),
axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 8, colour = 'black'), axis.text.y = element_text(size = 8, colour = 'black'));
g2

InfinityWar
df.s <- matrix(NA, nrow = nrow(tweets3), ncol = ncol(emojis));
system.time(df.s <- sapply(emojis$rencoding, regexpr, tweets3$text, ignore.case = T, useBytes = T));
## user system elapsed
## 1.769 0.012 1.799
rownames(df.s) <- 1:nrow(df.s); colnames(df.s) <- 1:ncol(df.s);df.t <- data.frame(df.s); df.t$tweetid <- tweets3$tweetid;
# merge in hashtag data from original tweets dataset
df.a <- subset(tweets3, select = c(tweetid, hashtag));
df.u <- merge(df.t, df.a, by = 'tweetid'); df.u$z <- 1;df.u <- arrange(df.u, tweetid);
tweets.emojis.matrix <- df.u;
## create emoji count dataset
df <- subset(tweets.emojis.matrix)[, c(2:843)]; count <- colSums(df > -1);
emojis.m <- cbind(count, emojis); emojis.m <- arrange(emojis.m, desc(count));
emojis.count <- subset(emojis.m, count > 1); emojis.count$dens <- round(1000 * (emojis.count$count / nrow(tweets3)), 1); emojis.count$dens.sm <- (emojis.count$count + 1) / (nrow(tweets3) + 1);
emojis.count$rank <- as.numeric(row.names(emojis.count));
emojis.count.p <- subset(emojis.count, select = c(name, dens, count, rank));
# print summary stats
subset(emojis.count.p, rank <= 10);
## name dens count rank
## 1 smiling face with heart-shaped eyes 33.3 19 1
## 2 face with tears of joy 22.8 13 2
## 3 heavy black heart 14.0 8 3
## 4 heart with ribbon 12.3 7 4
## 5 ship 10.5 6 5
## 6 alarm clock 10.5 6 6
## 7 two hearts 10.5 6 7
## 8 bomb 10.5 6 8
## 9 earth globe europe-africa 10.5 6 9
## 10 sparkles 8.8 5 10
num.tweets <- nrow(tweets3); df.t <- rowSums(tweets.emojis.matrix[, c(2:843)] > -1); num.tweets.with.emojis <- length(df.t[df.t > 0]); num.emojis <- sum(emojis.count$count);
#min(tweets3$created); max(tweets3$created); median(tweets3$created);
num.tweets; num.tweets.with.emojis; round(100 * (num.tweets.with.emojis / num.tweets), 1); num.emojis; nrow(emojis.count);
## [1] 570
## [1] 82
## [1] 14.4
## [1] 132
## [1] 27
#MAKE BAR CHART OF TOP EMOJIS
df.plot <- subset(emojis.count.p, rank <= 10); xlab <- 'Rank'; ylab <- 'Overall Frequency (per 1,000 Tweets)';
setwd("~/Documents/R/twitter project/ios_9_3_emoji_files")
df.plot <- arrange(df.plot, name);
imgs <- lapply(paste0(df.plot$name, '.png'), png::readPNG); g <- lapply(imgs, grid::rasterGrob);
k <- 0.20 * (10/nrow(df.plot)) * max(df.plot$dens); df.plot$xsize <- k; df.plot$ysize <- k;
df.plot <- arrange(df.plot, name);
write.csv(df.plot, "emoji3.csv", row.names = FALSE)
g3 <- ggplot(data = df.plot, aes(x = rank, y = dens)) +
geom_bar(stat = 'identity', fill = 'dodgerblue4') +
xlab(xlab) + ylab(ylab) +
mapply(function(x, y, i) {
annotation_custom(g[[i]], xmin = x-0.5*df.plot$xsize[i], xmax = x+0.5*df.plot$xsize[i],
ymin = y-0.5*df.plot$ysize[i], ymax = y+0.5*df.plot$ysize[i])},
df.plot$rank, df.plot$dens, seq_len(nrow(df.plot))) +
scale_x_continuous(expand = c(0, 0), breaks = seq(1, nrow(df.plot), 1), labels = seq(1, nrow(df.plot), 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.10 * max(df.plot$dens))) +
labs(title="Emoji for InfinityWar")+
theme(panel.grid.minor.y = element_blank(),
axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 8, colour = 'black'), axis.text.y = element_text(size = 8, colour = 'black'));
g3

Word Cloud Analysis
read csv files
setwd("~/Documents/R/twitter project")
thort<-read.csv("Thortext.csv", header = TRUE, sep = ",")
capt<-read.csv("Captext.csv", header = TRUE, sep = ",")
avent<-read.csv("Avengerstext.csv", header = TRUE, sep = ",")
Thor
#data manipulation
thortext= sapply(thort$text,function(row) iconv(row, "latin1", "ASCII", sub=""))
# remove retweet entities
thortext = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', thortext)
# remove at people
thortext = gsub('@\\w+', '', thortext)
# remove punctuation
thortext = gsub('[[:punct:]]', '', thortext)
# remove numbers
thortext = gsub('[[:digit:]]', '', thortext)
# remove html links
thortext = gsub('http\\w+', '', thortext)
# remove unnecessary spaces
thortext = gsub('[ \t]{2,}', '', thortext)
thortext = gsub('^\\s+|\\s+$', '', thortext)
th_corpus = Corpus(VectorSource(thortext))
tdm = TermDocumentMatrix(
th_corpus,
control = list(
stopwords = c("just","now","much","ever","got","http","can","rt","htt",stopwords("english")),
tolower = TRUE))
m = as.matrix(tdm)
word_freqs = sort(rowSums(m), decreasing = TRUE)
# create a data frame with words and their frequencies
dm = data.frame(word = names(word_freqs), freq = word_freqs)
write.csv(dm, "wc1.csv", row.names = FALSE)
thorwc<-wordcloud(dm$word, dm$freq, random.order = FALSE, min.freq=5,colors = brewer.pal(8, "Dark2"))

thorwc
## NULL
CaptainAmerica
#data manipulation
captext= sapply(capt$text,function(row) iconv(row, "latin1", "ASCII", sub=""))
# remove retweet entities
captext = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', captext)
# remove at people
captext = gsub('@\\w+', '', captext)
# remove punctuation
captext = gsub('[[:punct:]]', '', captext)
# remove numbers
captext = gsub('[[:digit:]]', '', captext)
# remove html links
captext = gsub('http\\w+', '', captext)
# remove unnecessary spaces
captext = gsub('[ \t]{2,}', '', captext)
captext = gsub('^\\s+|\\s+$', '', captext)
cap_corpus = Corpus(VectorSource(captext))
tdmc = TermDocumentMatrix(
cap_corpus,
control = list(
stopwords = c("just","now","much","ever","got","http","can","rt","htt","httpstco",stopwords("english")),
tolower = TRUE))
mc = as.matrix(tdmc)
word_freqsc = sort(rowSums(mc), decreasing = TRUE)
# create a data frame with words and their frequencies
dmc = data.frame(word = names(word_freqsc), freq = word_freqsc)
write.csv(dmc, "wc2.csv", row.names = FALSE)
capwc<-wordcloud(dmc$word, dmc$freq, random.order = FALSE, min.freq=5,colors = brewer.pal(8, "Dark2"))

capwc
## NULL
Avengers
#data manipulation
aventext= sapply(avent$text,function(row) iconv(row, "latin1", "ASCII", sub=""))
# remove retweet entities
aventext = gsub('(RT|via)((?:\\b\\W*@\\w+)+)', '', aventext)
# remove at people
aventext = gsub('@\\w+', '', aventext)
# remove punctuation
aventext = gsub('[[:punct:]]', '', aventext)
# remove numbers
aventext = gsub('[[:digit:]]', '', aventext)
# remove html links
aventext = gsub('http\\w+', '', aventext)
# remove unnecessary spaces
aventext = gsub('[ \t]{2,}', '', aventext)
aventext = gsub('^\\s+|\\s+$', '', aventext)
aven_corpus = Corpus(VectorSource(aventext))
tdma = TermDocumentMatrix(
aven_corpus,
control = list(
stopwords = c("just","now","much","ever","got","http","can","rt","htt","may",stopwords("english")),
tolower = TRUE))
ma = as.matrix(tdma)
word_freqsa = sort(rowSums(ma), decreasing = TRUE)
# create a data frame with words and their frequencies
dma = data.frame(word = names(word_freqsa), freq = word_freqsa)
write.csv(dma, "wc3.csv", row.names = FALSE)
avenwc<-wordcloud(dma$word, dma$freq, random.order = FALSE, min.freq=5,colors = brewer.pal(8, "Dark2"))

avenwc
## NULL